
import scrapy
import os
from ..items import FreersaklerimageItem


class FreersacklerImage(scrapy.Spider):
    name = 'fimg'
    start_urls = []
    # 从文件中读取图片url
    for line in open("D:\A\spider\img_denver.txt"):
        img_directory = 'D:\A\spider\Image\img_denver'
        line = line.strip()
        path = "{}\{}".format(img_directory, line.replace("https://s3.amazonaws.com/damcollections/", "").replace("/2000/2000_thumb", ""))
        if os.path.exists(path) is False:
            start_urls.append(line)


    def parse(self, response):
        item = FreersaklerimageItem()
        img_directory = 'D:\A\spider\Image\img_denver'
        image = response.body
        imagepath = "{}\{}".format(img_directory, response.url.replace("https://s3.amazonaws.com/damcollections/", "").replace("/2000/2000_thumb", ""))
        item['imageurl'] = response.url
        item['image'] = image
        item['imagepath'] = imagepath
        yield item